inputFileName = "/data5/team/weijiang/the_new_trip_of_feature_generation/final-150Clueweb09Queries.txt"                                                                              
inputFileHandler = open(inputFileName,"r")

inputFileName2 = "/data5/team/weijiang/the_new_trip_of_feature_generation/currentIndexLexiconsWithMetaInfo.txt"                                                                              
inputFileHandler2 = open(inputFileName2,"r")

outputFileName2 = "/data5/team/weijiang/the_new_trip_of_feature_generation/queryTermsWithMetaInfo.txt"                                                                              
outputFileHandler2 = open(outputFileName2,"w")

queryTermsDict = {}

for line in inputFileHandler.readlines():
    for queryTerm in line.strip().split(":")[1].split(" "):
        if queryTerm not in queryTermsDict:
            queryTermsDict[queryTerm] = 0
        else:
            pass
inputFileHandler.close()


print "len(queryTermsDict):",len(queryTermsDict)

currentLine = inputFileHandler2.readline().strip()
while currentLine.strip() != "":
    elements = currentLine.split(" ")
    #print elements[0],elements[1]
    if elements[0] in queryTermsDict:
        queryTermsDict[elements[0]] = int(elements[1])
        print elements[0],queryTermsDict[elements[0]]
        outputFileHandler2.write(elements[0] + " " + str(queryTermsDict[elements[0]]) + "\n")
    #print currentLine
    currentLine = inputFileHandler2.readline().strip()
    
outputFileHandler2.close()



print "queryTermsDict(new):",queryTermsDict
    
    
'''
inputFileName = "/data1/team/weijiang/machine-learning-project-related/trec-related/trecCombine09_11/priorityWarcDocumentsIDs_all_sorted_beginning_en0041_78.txt"                                                                              
inputFileHandler = open(inputFileName,"r")

outputFileName1 = "/data1/team/weijiang/machine-learning-project-related/trec-related/trecCombine09_11/priorityWarcDocumentsIDs_all_sorted_62_71.txt"                                                                              
outputFileHandler1 = open(outputFileName1,"w")

outputFileName2 = "/data1/team/weijiang/machine-learning-project-related/trec-related/trecCombine09_11/priorityWarcDocumentsIDs_all_sorted_72_81.txt"                                                                              
outputFileHandler2 = open(outputFileName2,"w")

outputFileName3 = "/data1/team/weijiang/machine-learning-project-related/trec-related/trecCombine09_11/priorityWarcDocumentsIDs_all_sorted_82_91.txt"                                                                              
outputFileHandler3 = open(outputFileName3,"w")

outputFileName4 = "/data1/team/weijiang/machine-learning-project-related/trec-related/trecCombine09_11/priorityWarcDocumentsIDs_all_sorted_92_101.txt"                                                                              
outputFileHandler4 = open(outputFileName4,"w")

outputFileName5 = "/data1/team/weijiang/machine-learning-project-related/trec-related/trecCombine09_11/priorityWarcDocumentsIDs_all_sorted_102_111.txt"                                                                              
outputFileHandler5 = open(outputFileName5,"w")

outputFileName6 = "/data1/team/weijiang/machine-learning-project-related/trec-related/trecCombine09_11/priorityWarcDocumentsIDs_all_sorted_112_121.txt"                                                                              
outputFileHandler6 = open(outputFileName6,"w")

outputFileName7 = "/data1/team/weijiang/machine-learning-project-related/trec-related/trecCombine09_11/priorityWarcDocumentsIDs_all_sorted_122_131.txt"                                                                              
outputFileHandler7 = open(outputFileName7,"w")

for line in inputFileHandler.readlines():
    if line.strip().split("-")[1].startswith("en0"):
        number = int(line.strip().split("-")[1][2:6])
        if number >= 62 and number <= 71:
            outputFileHandler1.write(line)
        elif number >= 72 and number <= 81:
            outputFileHandler2.write(line)
        elif number >= 82 and number <= 91:
            outputFileHandler3.write(line)
        elif number >= 92 and number <= 101:
            outputFileHandler4.write(line)        
        elif number >= 102 and number <= 111:
            outputFileHandler5.write(line)
        elif number >= 112 and number <= 121:
            outputFileHandler6.write(line)
        elif number >= 122 and number <= 131:
            outputFileHandler7.write(line)

outputFileHandler1.close()
outputFileHandler2.close()
outputFileHandler3.close()
outputFileHandler4.close()
outputFileHandler5.close()
outputFileHandler6.close()
outputFileHandler7.close()          
'''
        
        
        
        
        
        
        
        
        
        
        